/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection

Created on: 12/28/2022
Last Modified on: 2/17/2024

Description: This program generates investigator-level placement and outcome
rates, as well as standard errors for these estimates, among subgroups of 
children and investigations.

Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/

**************************
**(0) SETUP
**************************
clear
set more off
macro drop all
capture log close

*Set directories
global clean 
global cleandata 
global tmpdata 


**************************
**(1) GENERATE SAMPLE OF INTEREST, BY SUBGROUP
**************************
foreach xvar in female male young old abuse neg {
use "${clean}child_investigation_sample.dta", clear 

*Drop duplicates by victim and investigation - should be a victim X inv sample 
gegen tag=tag(vicid inv_caseid)
keep if tag==1 

*This sample is from January 1 2008 to October 23 2017 - append to data from April 24 2017 to June 30 2019 (drop calls from  24 2017 to October 23 2017)
keep if complaint_date<20933
append using "${tmpdata}sample_inv_2017_2019.dta"
format cw_date_stata %td
replace complaint_date = cw_date_stata if complaint_date==.
drop cw_date_stata 

**SAMPLE RESTRICTIONS
*Keep only non-repeat investigations  
sort vicid complaint_date inv_caseid, stable  
cap drop diff 
bysort vicid: gen diff = complaint_date[_n] - complaint_date[_n-1]
order diff 

drop if diff<365 & diff!=.

*Drop sexual abuse cases 
drop if sexab==1 

*Drop observations with missing zipcodes 
drop if zipcode_vic==. 

*Keep only white and black children
keep if white==1 | black==1 

*Gen all subgroups 
 gen male=female==0
 gen young = age_inv <7 
 gen old = young==0
 gen abuse = (phyab==1) 
 gen neglect = abuse==0 

*Keep if subgroup is one 
keep if `xvar'==1

*Limit to investigators with at least 100 investigations
bysort worker_id: gen n=_N
drop if n<100

*Generate rotation and drop "trivial rotations"
egen rotationgroup = group(zipcode_vic cps_year)
bysort rotationgroup: gen nobs = _N 
tab nobs if nobs<10 
drop if nobs ==1 

*Drop investigators who were *only* assigned to white or black children 
gegen tmp = var(white), by(worker_id)
order tmp 
drop if tmp==0 
drop tmp 

gegen tmp = var(black), by(worker_id)
order tmp 
drop if tmp==0 

*Drop observations that we can't follow for at least six months 
drop if (postm1_inv==. | postm2_inv==. | postm3_inv==. | postm4_inv==. | postm5_inv==. | postm6_inv==.) & complaint_date<20933
rename black d_black

*Generate main outcomes 
forvalues j = 1(1)6 {
gen inv_`j'm = 0 
forvalues i = 1/`j' {
	sum postm`i'_inv
	replace inv_`j'm = 1 if postm`i'_inv==1
	sum inv_`j'm* 
}
}

replace inv6m = inv_6m if inv6m==.

foreach x in inv6m {
	replace `x'=. if fc==1
}

sum inv*m

*Generate remaining variables
gen nofc=fc==0

//Count of cases
cap drop count_inv
bys worker_id: gen long count_inv = _N

// Count of cases by investigator by race:
*rename pre_black d_black
bys worker_id: egen count_black = total(d_black)
gen nonblack = (d_black==0)
bys worker_id: egen count_white = total(nonblack)
bys worker_id: egen share_black=mean(d_black)

sum d_black 
local bshare = r(mean)
gen bshare = `bshare'
cap drop pre_blac 
rename d_black pre_black 

**************************
**(2) ESTIMATE INVESTRIGATOR-SPECIFIC RATES AND STANDARD ERRORS
**************************
preserve 
//get levels of invid
levelsof worker_id
local levels = "`r(levels)'"

// loop over outcomes 
foreach var in fc inv6m {

	// for lah outcome use the entire sample
	if "`var'"=="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black, ///
			absorb(rotationgroup) resid
	}
	
	// for other outcomes condition on leave at home
	if "`var'"!="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black if fc==0, ///
			absorb(rotationgroup) resid
	}
	
		gen beta_gap_`var' = .
		gen beta_base_`var' = .
		gen se_gap_`var' = .
		gen se_base_`var' = .
				
		foreach i in `levels' {
		
			// retrieve gaps
			cap replace beta_gap_`var' =  _b[`i'.worker_id#1.pre_black] if worker_id == `i'
			cap replace se_gap_`var' = _se[`i'.worker_id#1.pre_black] if worker_id == `i'
			
			// retrieve base values
			cap replace beta_base_`var' =  _b[`i'.worker_id] if worker_id == `i'			
			cap replace se_base_`var'= _se[`i'.worker_id] if  worker_id==`i'			
	   }
	   
	// aggregate outcomes
	// E[Y|inv,r] = invFE + invFE*black + g*E[X_i]
   
	local exp 0
   
	// retrieve average of absorbvar
	
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	
	local exp = `exp'+ `r(mean)' 
	
	// replace base value equal to zero for omitted inv
	replace beta_base_`var'=0 if beta_base_`var'==.
	
	// generate aggregate outcomes
	local baselin "_cons"
	
	gen w_fe_`var'=.
	gen b_fe_`var'=.
	gen w_se_fe_`var'=.
	gen b_se_fe_`var'=.
	
	// quietly
	qui {
	// white children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin'"
			lincom `lin' + `exp'
			replace w_fe_`var'= `r(estimate)' if worker_id==`i'
			replace w_se_fe_`var'=`r(se)' if worker_id==`i'			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id]"
			lincom `lin' + `exp'
			replace w_fe_`var' = `r(estimate)' if worker_id==`i'
			replace w_se_fe_`var'=`r(se)' if worker_id==`i'			
		}
	}	
	
	// black children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin' + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_fe_`var' = `r(estimate)' if worker_id==`i'
			replace b_se_fe_`var'=`r(se)' if worker_id==`i'			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id] + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_fe_`var' = `r(estimate)' if worker_id==`i'
			replace b_se_fe_`var'=`r(se)' if worker_id==`i'			
		}
	}
		
	drop xbd xb d
	
	}
	 
}

*Save investigator-level dataset:
keep se_* beta_* b_* w_* worker_id count_inv bshare
duplicates drop worker_id, force 	
drop if worker_id==.

save "${cleandata}inv_rates_`xvar'_qje.dta", replace
restore 


preserve 
//get levels of invid
levelsof worker_id
local levels = "`r(levels)'"

// loop over outcomes 
foreach var in fc inv6m {

	// for lah outcome use the entire sample
	if "`var'"=="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black, ///
			absorb(rotationgroup) resid cluster(worker_id vicid)
	}
	
	// for other outcomes condition on leave at home
	if "`var'"!="fc"{
		reghdfe `var' i.worker_id i.worker_id#i.pre_black if fc==0, ///
			absorb(rotationgroup) resid cluster(worker_id vicid)
	}
	
		gen twse_gap_`var' = .
		gen twse_base_`var' = .
				
		foreach i in `levels' {
		
			// retrieve gaps
			cap replace twse_gap_`var' = _se[`i'.worker_id#1.pre_black] if worker_id == `i'
			
			// retrieve base values			
			cap replace twse_base_`var'= _se[`i'.worker_id] if  worker_id==`i'			
	   }
	   
	// aggregate outcomes
	// E[Y|inv,r] = invFE + invFE*black + g*E[X_i]
   
	local exp 0
   
	// retrieve average of absorbvar
	
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	
	local exp = `exp'+ `r(mean)' 
	
	// generate aggregate outcomes
	local baselin "_cons"
	
	gen w_twse_fe_`var'=.
	gen b_twse_fe_`var'=.
	
	// quietly
	qui {
	// white children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin'"
			lincom `lin' + `exp'
			replace w_twse_fe_`var'=`r(se)' if worker_id==`i'			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id]"
			lincom `lin' + `exp'
			replace w_twse_fe_`var'=`r(se)' if worker_id==`i'			
		}
	}	
	
	// black children
	foreach i in `levels'{
	
		if `i'==1 {
		
			local lin "`baselin' + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_twse_fe_`var'=`r(se)' if worker_id==`i'			
		}
		
		else {
			
			local lin "`baselin' + _b[`i'.worker_id] + _b[`i'.worker_id#1.pre_black]"
			lincom `lin' + `exp'
			replace b_twse_fe_`var'=`r(se)' if worker_id==`i'			
		}
	}
		
	drop xbd xb d
	
	}
	 
}

*Save investigator-level dataset:
keep worker_id b_twse_* w_twse_* twse_*
duplicates drop worker_id, force 	
drop if worker_id==.

save "${cleandata}inv_se_`xvar'_qje.dta", replace
restore 
}

*Merge standard errors and rates, and save final dataset:
foreach xvar in female male young old abuse neg {
	use "${cleandata}inv_rates_`xvar'_qje.dta", clear 
	merge 1:1 worker_id using "${cleandata}inv_se_`xvar'_qje.dta", keep(1 3) keepus(b_twse_* w_twse_* twse_*)
	
	foreach x in b_fe_fc w_fe_fc {
		replace `x' = 0 if `x'<0 
		replace `x' = 1 if `x'>1
		replace `x'= 1-`x'
	}
	
	rename (w_fe_fc b_fe_fc w_fe_inv6m b_fe_inv6m) (D_w D_b Y_w Y_b)
	rename (w_se_fe_fc b_se_fe_fc) (w_se_fe_nofc b_se_fe_nofc)
	
	gen D_w2 = D_w*D_w 
	gen D_b2 = D_b*D_b
	save "${cleandata}inv_rates_se_`xvar'_qje.dta", replace 
}